library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
Data source:
https://www.kaggle.com/datasets/cdc/national-health-and-nutrition-examination-survey?select=labs.csv
Details:
https://www.cdc.gov/Nchs/Nhanes/about_nhanes.htm
demographic <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/demographic.xlsx"))
demographiDesc <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/demographic.xlsx",sheet = "Sheet1"))
rownames(demographic) <- demographic$SEQN
demographic$SEQN <- NULL
vartokeep <- demographiDesc[demographiDesc$Keep=="Yes",1]
demographic <- demographic[,vartokeep]
keepColumn <- apply(!is.na(demographic),2,sum)/nrow(demographic) > 0.75
demographic <- demographic[,keepColumn]
demographic <- demographic[complete.cases(demographic),]
## Examination data
examination <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/examination.xlsx"))
examinationDesc <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/examination.xlsx",sheet = "Sheet1"))
rownames(examination) <- examination$SEQN
examination$SEQN <- NULL
bpresDI <- examination[,c("BPXDI1","BPXDI2","BPXDI3","BPXDI4")]
examination[,c("BPXDI1","BPXDI2","BPXDI3","BPXDI4")] <- NULL
bpresDIM <- apply(bpresDI,1,mean,na.rm=TRUE)
summary(bpresDIM)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 0.00 58.00 66.67 65.44 74.67 128.00 2282
bpresSY <- examination[,c("BPXSY1","BPXSY2","BPXSY3","BPXSY4")]
examination[,c("BPXSY1","BPXSY2","BPXSY3","BPXSY4")] <- NULL
bpresSYM <- apply(bpresSY,1,mean,na.rm=TRUE)
summary(bpresSYM)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 64.67 106.00 115.33 118.31 128.00 228.67 2282
examination$PEASCST1 <- NULL
examination$bpresDIM <- bpresDIM
examination$bpresSYM <- bpresSYM
keepColumn <- apply(!is.na(examination),2,sum)/nrow(examination) > 0.75
examination <- examination[,keepColumn]
keepColumnVar <- !is.na(apply(examination,2,var,na.rm = TRUE))
examination <- examination[,keepColumnVar]
examination <- examination[complete.cases(examination),]
examination <- examination[examination$bpresDIM>0,]
## questionnaire data
questionnaire <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/questionnaire.xlsx"))
questionnaireDesc <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/questionnaire.xlsx",sheet = "Sheet1"))
rownames(questionnaire) <- questionnaire$SEQN
questionnaire$SEQN <- NULL
table(questionnaire$MCQ010)
#>
#> 1 2 7 9
#> 1538 8222 1 8
#keepColumn <- apply(!is.na(questionnaire),2,sum)/nrow(questionnaire) > 0.85
#questionnaire <- questionnaire[,keepColumn]
#keepColumnVar <- !is.na(apply(questionnaire,2,var,na.rm = TRUE))
#questionnaire <- questionnaire[,keepColumnVar]
#questionnaire <- questionnaire[complete.cases(questionnaire),]
#keeprow <- apply(questionnaire,2,max) < 9000
#questionnaire <- questionnaire[keeprow,]
#table(questionnaire$MCQ010)
## Diabetes
Diabetes <- questionnaire$MCQ010
names(Diabetes) <- rownames(questionnaire)
Diabetes <- Diabetes[!is.na(Diabetes)]
Diabetes <- Diabetes[Diabetes<3]
table(Diabetes)
#> Diabetes
#> 1 2
#> 1538 8222
## Depression
depression <- questionnaire[,c("DPQ010","DPQ020","DPQ030","DPQ040","DPQ050","DPQ060","DPQ070","DPQ080","DPQ090")]
depression[depression>5] <- NA
depressionTot <- apply(depression,1,sum,na.rm=TRUE)
donnow <- apply(is.na(depression),1,sum) == 9
depressionTot <- depressionTot[!donnow]
table(depressionTot)
#> depressionTot
#> 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
#> 1718 852 631 470 360 233 214 162 137 104 90 66 53 51 57 31
#> 16 17 18 19 20 21 22 23 24 25 27
#> 39 26 27 19 16 12 11 4 7 4 3
## The labs data
labs <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/labs.xlsx"))
labsDesc <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/NHNES/labs.xlsx",sheet = "Sheet1"))
rownames(labs) <- labs$SEQN
labs$SEQN <- NULL
keepColumn <- apply(!is.na(labs),2,sum)/nrow(labs) > 0.75
labs <- labs[,keepColumn]
keepColumnVar <- !is.na(apply(labs,2,var,na.rm = TRUE))
labs <- labs[,keepColumnVar]
labs <- labs[complete.cases(labs),]
includeIDS <- rownames(labs)[rownames(labs) %in% rownames(examination)]
includeIDS <- includeIDS[includeIDS %in% rownames(demographic)]
includeIDS <- includeIDS[includeIDS %in% names(Diabetes)]
Diab <- Diabetes[includeIDS]
table(Diab)
#> Diab
#> 1 2
#> 992 5006
NHNES <- cbind(demographic[includeIDS,],examination[includeIDS,],labs[includeIDS,])
NHNES$Diab <- Diab
table(NHNES$Diab)
#>
#> 1 2
#> 992 5006
## Only Adults in this experiment
NHNES <- NHNES[NHNES$RIDAGEYR >= 20,]
NHNES$Diab <- 1*(NHNES$Diab == 1)
table(NHNES$Diab)
#>
#> 0 1
#> 3664 650
includeIDS <- rownames(NHNES)
includeIDS <- includeIDS[includeIDS %in% names(depressionTot)]
depressionTot <- depressionTot[includeIDS]
NHNESDep <- cbind(NHNES[includeIDS,],depressionTot)
NHNESDep$depressionTot <- log(NHNESDep$depressionTot+1)
tbdep <- table(NHNESDep$depressionTot)
genderDiab <- NHNESDep[,c("RIAGENDR","Diab")]
isContinuous <- sapply(apply(NHNESDep,2,unique),length) > 4
sum(isContinuous)
#> [1] 63
NHNESDep <- cbind(NHNESDep[,isContinuous],genderDiab)
table(NHNESDep$RIAGENDR)
#>
#> 1 2
#> 2059 2094
colnames(NHNESDep)[sapply(apply(NHNESDep,2,unique),length) < 10]
#> [1] "OHX04TC" "OHX06TC" "OHX10TC" "OHX11TC" "OHX13TC" "OHX20TC"
#> [7] "OHX25TC" "OHX29TC" "LBDBANO" "RIAGENDR" "Diab"
studyName <- "NHNES"
dataframe <- NHNESDep
outcome <- "RIDAGEYR"
thro <- 0.40
cexheat <- 0.35
loops <- 30
Some libraries
library(psych)
library(whitening)
library("vioplot")
library("rpart")
library(multiColl)
library(car)
library("pls")
source("C:/Users/jtame/Documents/GitHub/LatentBiomarkers/RMD/RepeatedLinearCV.R")
pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
| rows | col |
|---|---|
| 4153 | 64 |
varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]
largeSet <- length(varlist) > 1500
set.seed(1)
trainsamples <- sample(nrow(dataframe),3*nrow(dataframe)/4)
trainingset <- dataframe[trainsamples,]
testingset <- dataframe[-trainsamples,]
pander::pander(t(summary(trainingset)))
| RIDAGEYR | Min. :20.00 | 1st Qu.:33.00 | Median :47.00 | Mean :48.03 | 3rd Qu.:62.00 | Max. :80.00 |
| WTINT2YR | Min. : 5735 | 1st Qu.: 19354 | Median : 28735 | Mean : 41781 | 3rd Qu.: 59117 | Max. :167885 |
| WTMEC2YR | Min. : 6107 | 1st Qu.: 19999 | Median : 29753 | Mean : 43054 | 3rd Qu.: 60384 | Max. :171075 |
| PEASCTM1 | Min. : 13.0 | 1st Qu.: 664.0 | Median : 755.0 | Mean : 794.6 | 3rd Qu.: 899.0 | Max. :2056.0 |
| BPXPLS | Min. : 42.00 | 1st Qu.: 64.00 | Median : 72.00 | Mean : 72.51 | 3rd Qu.: 80.00 | Max. :140.00 |
| BPXML1 | Min. :110.0 | 1st Qu.:140.0 | Median :140.0 | Mean :147.8 | 3rd Qu.:160.0 | Max. :220.0 |
| BMXWT | Min. : 32.80 | 1st Qu.: 66.80 | Median : 78.80 | Mean : 81.67 | 3rd Qu.: 92.90 | Max. :195.40 |
| BMXHT | Min. :140.9 | 1st Qu.:160.3 | Median :167.5 | Mean :167.8 | 3rd Qu.:175.1 | Max. :202.6 |
| BMXBMI | Min. :14.10 | 1st Qu.:24.20 | Median :27.90 | Mean :28.94 | 3rd Qu.:32.30 | Max. :64.70 |
| BMXLEG | Min. :26.00 | 1st Qu.:36.60 | Median :39.00 | Mean :39.06 | 3rd Qu.:41.60 | Max. :51.90 |
| BMXARML | Min. :28.70 | 1st Qu.:35.30 | Median :37.30 | Mean :37.36 | 3rd Qu.:39.30 | Max. :46.50 |
| BMXARMC | Min. :19.00 | 1st Qu.:29.60 | Median :32.80 | Mean :33.13 | 3rd Qu.:36.10 | Max. :57.00 |
| BMXWAIST | Min. : 55.50 | 1st Qu.: 87.53 | Median : 97.80 | Mean : 99.12 | 3rd Qu.:108.47 | Max. :164.20 |
| MGXH1T1 | Min. : 6.50 | 1st Qu.:25.00 | Median :31.00 | Mean :32.68 | 3rd Qu.:40.00 | Max. :81.50 |
| MGXH2T1 | Min. : 5.80 | 1st Qu.:25.40 | Median :31.80 | Mean :33.41 | 3rd Qu.:40.90 | Max. :79.50 |
| MGXH1T2 | Min. : 5.2 | 1st Qu.:26.1 | Median :32.4 | Mean :34.3 | 3rd Qu.:41.8 | Max. :75.9 |
| MGXH2T2 | Min. : 6.00 | 1st Qu.:26.40 | Median :33.00 | Mean :34.79 | 3rd Qu.:42.60 | Max. :81.40 |
| MGXH1T3 | Min. : 9.20 | 1st Qu.:26.60 | Median :33.10 | Mean :34.96 | 3rd Qu.:42.50 | Max. :81.20 |
| MGXH2T3 | Min. : 6.60 | 1st Qu.:26.43 | Median :33.30 | Mean :35.17 | 3rd Qu.:43.10 | Max. :82.80 |
| MGDCGSZ | Min. : 17.90 | 1st Qu.: 55.30 | Median : 68.25 | Mean : 72.10 | 3rd Qu.: 88.25 | Max. :162.80 |
| OHX04TC | Min. :1.000 | 1st Qu.:2.000 | Median :2.000 | Mean :2.494 | 3rd Qu.:2.000 | Max. :5.000 |
| OHX06TC | Min. :1.000 | 1st Qu.:2.000 | Median :2.000 | Mean :2.294 | 3rd Qu.:2.000 | Max. :5.000 |
| OHX10TC | Min. :2.000 | 1st Qu.:2.000 | Median :2.000 | Mean :2.357 | 3rd Qu.:2.000 | Max. :5.000 |
| OHX11TC | Min. :1.0 | 1st Qu.:2.0 | Median :2.0 | Mean :2.3 | 3rd Qu.:2.0 | Max. :5.0 |
| OHX13TC | Min. :1.000 | 1st Qu.:2.000 | Median :2.000 | Mean :2.501 | 3rd Qu.:2.000 | Max. :5.000 |
| OHX20TC | Min. :1.000 | 1st Qu.:2.000 | Median :2.000 | Mean :2.367 | 3rd Qu.:2.000 | Max. :5.000 |
| OHX25TC | Min. :1.000 | 1st Qu.:2.000 | Median :2.000 | Mean :2.215 | 3rd Qu.:2.000 | Max. :5.000 |
| OHX29TC | Min. :1.000 | 1st Qu.:2.000 | Median :2.000 | Mean :2.375 | 3rd Qu.:2.000 | Max. :5.000 |
| bpresDIM | Min. : 11.33 | 1st Qu.: 63.33 | Median : 70.67 | Mean : 70.00 | 3rd Qu.: 76.67 | Max. :128.00 |
| bpresSYM | Min. : 79.33 | 1st Qu.:110.67 | Median :120.00 | Mean :122.63 | 3rd Qu.:132.00 | Max. :212.00 |
| URXUMA | Min. : 0.21 | 1st Qu.: 4.10 | Median : 7.70 | Mean : 44.17 | 3rd Qu.: 16.20 | Max. :9600.00 |
| URXUMS | Min. : 0.21 | 1st Qu.: 4.10 | Median : 7.70 | Mean : 44.17 | 3rd Qu.: 16.20 | Max. :9600.00 |
| URXUCR.x | Min. : 8.0 | 1st Qu.: 59.0 | Median :106.0 | Mean :120.4 | 3rd Qu.:162.0 | Max. :659.0 |
| URXCRS | Min. : 707.2 | 1st Qu.: 5215.6 | Median : 9370.4 | Mean :10643.3 | 3rd Qu.:14320.8 | Max. :58255.6 |
| URDACT | Min. : 0.26 | 1st Qu.: 4.74 | Median : 7.14 | Mean : 44.65 | 3rd Qu.: 13.20 | Max. :9000.00 |
| LBXWBCSI | Min. : 2.600 | 1st Qu.: 5.700 | Median : 7.000 | Mean : 7.303 | 3rd Qu.: 8.400 | Max. :55.700 |
| LBXLYPCT | Min. : 4.00 | 1st Qu.:24.90 | Median :30.00 | Mean :30.52 | 3rd Qu.:35.60 | Max. :88.00 |
| LBXMOPCT | Min. : 1.300 | 1st Qu.: 6.700 | Median : 7.900 | Mean : 8.191 | 3rd Qu.: 9.400 | Max. :38.900 |
| LBXNEPCT | Min. : 8.80 | 1st Qu.:51.90 | Median :58.40 | Mean :57.87 | 3rd Qu.:64.00 | Max. :90.80 |
| LBXEOPCT | Min. : 0.000 | 1st Qu.: 1.500 | Median : 2.200 | Mean : 2.739 | 3rd Qu.: 3.400 | Max. :28.300 |
| LBXBAPCT | Min. :0.0000 | 1st Qu.:0.5000 | Median :0.7000 | Mean :0.7408 | 3rd Qu.:0.9000 | Max. :5.8000 |
| LBDLYMNO | Min. : 0.40 | 1st Qu.: 1.70 | Median : 2.10 | Mean : 2.18 | 3rd Qu.: 2.60 | Max. :49.00 |
| LBDMONO | Min. :0.1000 | 1st Qu.:0.4000 | Median :0.6000 | Mean :0.5807 | 3rd Qu.:0.7000 | Max. :3.4000 |
| LBDNENO | Min. : 0.400 | 1st Qu.: 3.100 | Median : 4.000 | Mean : 4.297 | 3rd Qu.: 5.100 | Max. :15.800 |
| LBDEONO | Min. :0.0000 | 1st Qu.:0.1000 | Median :0.2000 | Mean :0.1965 | 3rd Qu.:0.2000 | Max. :3.1000 |
| LBDBANO | Min. :0.00000 | 1st Qu.:0.00000 | Median :0.00000 | Mean :0.04672 | 3rd Qu.:0.10000 | Max. :0.80000 |
| LBXRBCSI | Min. :2.890 | 1st Qu.:4.360 | Median :4.650 | Mean :4.672 | 3rd Qu.:4.970 | Max. :8.300 |
| LBXHGB | Min. : 6.40 | 1st Qu.:13.10 | Median :14.10 | Mean :14.07 | 3rd Qu.:15.10 | Max. :18.90 |
| LBXHCT | Min. :21.70 | 1st Qu.:38.90 | Median :41.60 | Mean :41.58 | 3rd Qu.:44.30 | Max. :55.90 |
| LBXMCVSI | Min. : 55.70 | 1st Qu.: 86.40 | Median : 89.60 | Mean : 89.22 | 3rd Qu.: 92.80 | Max. :108.70 |
| LBXMCHSI | Min. :16.80 | 1st Qu.:29.10 | Median :30.40 | Mean :30.19 | 3rd Qu.:31.60 | Max. :46.40 |
| LBXMC | Min. :28.00 | 1st Qu.:33.30 | Median :33.80 | Mean :33.81 | 3rd Qu.:34.40 | Max. :44.90 |
| LBXRDW | Min. :11.70 | 1st Qu.:13.00 | Median :13.40 | Mean :13.66 | 3rd Qu.:14.00 | Max. :26.10 |
| LBXPLTSI | Min. : 18 | 1st Qu.:196 | Median :229 | Mean :236 | 3rd Qu.:271 | Max. :633 |
| LBXMPSI | Min. : 6.200 | 1st Qu.: 7.800 | Median : 8.400 | Mean : 8.454 | 3rd Qu.: 9.000 | Max. :13.000 |
| PHAFSTHR.x | Min. : 0 | 1st Qu.: 2 | Median : 7 | Mean : 7 | 3rd Qu.:11 | Max. :35 |
| PHAFSTMN.x | Min. : 0.00 | 1st Qu.:14.00 | Median :30.00 | Mean :29.39 | 3rd Qu.:44.00 | Max. :59.00 |
| LBDHDD | Min. : 10.00 | 1st Qu.: 42.00 | Median : 50.00 | Mean : 52.71 | 3rd Qu.: 62.00 | Max. :173.00 |
| LBDHDDSI | Min. :0.260 | 1st Qu.:1.090 | Median :1.290 | Mean :1.363 | 3rd Qu.:1.600 | Max. :4.470 |
| LBXTC | Min. : 82.0 | 1st Qu.:162.0 | Median :187.0 | Mean :190.5 | 3rd Qu.:214.0 | Max. :813.0 |
| LBDTCSI | Min. : 2.120 | 1st Qu.: 4.190 | Median : 4.840 | Mean : 4.926 | 3rd Qu.: 5.530 | Max. :21.020 |
| URXVOL1 | Min. : 2.0 | 1st Qu.: 46.0 | Median : 88.0 | Mean :110.8 | 3rd Qu.:153.8 | Max. :552.0 |
| depressionTot | Min. :0.000 | 1st Qu.:0.000 | Median :1.099 | Mean :1.022 | 3rd Qu.:1.609 | Max. :3.332 |
| RIAGENDR | Min. :1.000 | 1st Qu.:1.000 | Median :2.000 | Mean :1.509 | 3rd Qu.:2.000 | Max. :2.000 |
| Diab | Min. :0.0000 | 1st Qu.:0.0000 | Median :0.0000 | Mean :0.1493 | 3rd Qu.:0.0000 | Max. :1.0000 |
varlist <- colnames(trainingset)
varlist <- varlist[varlist != outcome]
The heat map of the data
par(op)
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
cormat <- cor(testingset[,varlist],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Original Correlation",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
diag(cormat) <- 0
pander::pander(max(abs(cormat)))
1
par(op)
ILAA bootstrapped training and testing sets
trainage_DE <- ILAA(trainingset,thr=thro,Outcome=outcome,verbose=TRUE)
fast | LM | URXUMA URXUCR.x PHAFSTMN.x WTINT2YR WTMEC2YR PEASCTM1 BPXPLS BPXML1 BMXWT 0.86885246 0.85245902 0.08196721 0.06557377 0.52459016 0.59016393
Included: 61 , Uni p: 0.002459016 , Base Size: 3 , Rcrit: 0.05038592
1 <R=0.835,thr=0.950>, Top: 7< 1 >Fa= 7,<|><>Tot Used: 18 , Added: 11 , Zero Std: 0 , Max Cor: 1.000
2 <R=0.774,thr=0.950>, Top: 3< 1 >Fa= 8,<|><>Tot Used: 19 , Added: 3 , Zero Std: 0 , Max Cor: 0.938
3 <R=0.740,thr=0.900>, Top: 4< 1 >Fa= 11,<|><>Tot Used: 26 , Added: 4 , Zero Std: 0 , Max Cor: 0.895
4 <R=0.700,thr=0.800>, Top: 4< 1 >Fa= 14,<|><>Tot Used: 34 , Added: 5 , Zero Std: 0 , Max Cor: 0.973
5 <R=0.681,thr=0.950>, Top: 2< 1 >Fa= 16,<|><>Tot Used: 36 , Added: 2 , Zero Std: 0 , Max Cor: 0.789
6 <R=0.651,thr=0.700>, Top: 6< 2 >Fa= 18,<|><>Tot Used: 40 , Added: 7 , Zero Std: 0 , Max Cor: 0.914
7 <R=0.625,thr=0.900>, Top: 1< 1 >Fa= 19,<|><>Tot Used: 40 , Added: 1 , Zero Std: 0 , Max Cor: 0.770
8 <R=0.607,thr=0.700>, Top: 4< 1 >Fa= 19,<|><>Tot Used: 40 , Added: 3 , Zero Std: 0 , Max Cor: 0.697
9 <R=0.591,thr=0.600>, Top: 8< 2 >Fa= 24,<|><>Tot Used: 49 , Added: 8 , Zero Std: 0 , Max Cor: 0.631
10 <R=0.545,thr=0.600>, Top: 1< 1 >Fa= 24,<|><>Tot Used: 49 , Added: 1 , Zero Std: 0 , Max Cor: 0.892
11 <R=0.571,thr=0.800>, Top: 1< 1 >Fa= 24,<|><>Tot Used: 49 , Added: 1 , Zero Std: 0 , Max Cor: 0.748
12 <R=0.556,thr=0.700>, Top: 1< 1 >Fa= 24,<|><>Tot Used: 49 , Added: 1 , Zero Std: 0 , Max Cor: 0.722
13 <R=0.554,thr=0.700>, Top: 1< 1 >Fa= 24,<|><>Tot Used: 49 , Added: 1 , Zero Std: 0 , Max Cor: 0.839
14 <R=0.566,thr=0.800>, Top: 1< 1 >Fa= 24,<|><>Tot Used: 49 , Added: 1 , Zero Std: 0 , Max Cor: 0.600
15 <R=0.540,thr=0.500>, Top: 8< 3 >Fa= 25,<|><>Tot Used: 51 , Added: 16 , Zero Std: 0 , Max Cor: 0.640
16 <R=0.524,thr=0.600>, Top: 2< 1 >Fa= 25,<|><>Tot Used: 51 , Added: 2 , Zero Std: 0 , Max Cor: 0.672
17 <R=0.508,thr=0.600>, Top: 1< 1 >Fa= 25,<|><>Tot Used: 51 , Added: 1 , Zero Std: 0 , Max Cor: 0.566
18 <R=0.482,thr=0.500>, Top: 3< 1 >Fa= 25,<|><>Tot Used: 51 , Added: 3 , Zero Std: 0 , Max Cor: 0.598
19 <R=0.483,thr=0.500>, Top: 1< 1 >Fa= 25,<|><>Tot Used: 51 , Added: 1 , Zero Std: 0 , Max Cor: 0.673
20 <R=0.491,thr=0.600>, Top: 1< 1 >Fa= 25,<|><>Tot Used: 51 , Added: 1 , Zero Std: 0 , Max Cor: 0.633
21 <R=0.484,thr=0.600>, Top: 1< 1 >Fa= 25,<|><>Tot Used: 51 , Added: 1 , Zero Std: 0 , Max Cor: 0.484
22 <R=0.454,thr=0.400>, Top: 7< 1 >Fa= 26,<|><>Tot Used: 53 , Added: 6 , Zero Std: 0 , Max Cor: 0.477
23 <R=0.459,thr=0.400>, Top: 1< 1 >Fa= 27,<|><>Tot Used: 53 , Added: 1 , Zero Std: 0 , Max Cor: 0.408
24 <R=0.408,thr=0.400>, Top: 1< 1 >Fa= 27,<|><>Tot Used: 53 , Added: 1 , Zero Std: 0 , Max Cor: 0.560
25 <R=0.560,thr=0.500>, Top: 1< 1 >Fa= 28,<|><>Tot Used: 53 , Added: 1 , Zero Std: 0 , Max Cor: 0.456
26 <R=0.456,thr=0.400>, Top: 1< 1 >Fa= 28,<|><>Tot Used: 53 , Added: 1 , Zero Std: 0 , Max Cor: 0.396
27 <R=0.396,thr=0.400>
[ 27 ], 0.4042229 Decor Dimension: 53 Nused: 53 . Cor to Base: 34 , ABase: 61 , Outcome Base: 0
#trainage_DE <- ILAA(trainingset,thr=thro,Outcome=outcome,verbose=TRUE,bootstrap=30)
testage_DE <- predictDecorrelate(trainage_DE,testingset)
Generating the formulas
theLaFormulas <- getLatentCoefficients(trainage_DE)
theCharformulas <- attr(theLaFormulas,"LatentCharFormulas")
pander::pander(as.matrix(theCharformulas))
| La_WTMEC2YR | - (1.025)WTINT2YR + WTMEC2YR |
| La_BMXWT | + BMXWT - (2.759)BMXBMI - (0.325)MGDCGSZ |
| La_BMXHT | - (0.969)BMXWT + BMXHT + (2.674)BMXBMI |
| La_BMXLEG | - (0.142)BMXWT + (0.393)BMXBMI + BMXLEG - (0.206)BMXARML - (3.54e-03)MGDCGSZ |
| La_BMXARML | - (0.212)BMXWT + (0.584)BMXBMI + BMXARML - (5.26e-03)MGDCGSZ |
| La_BMXARMC | - (0.666)BMXBMI + BMXARMC - (0.066)MGDCGSZ |
| La_BMXWAIST | - (0.494)BMXWT - (0.843)BMXBMI + BMXWAIST + (0.161)MGDCGSZ |
| La_MGXH1T1 | + MGXH1T1 - (0.439)MGDCGSZ |
| La_MGXH2T1 | + MGXH2T1 + (0.614)MGXH1T3 - (0.764)MGDCGSZ |
| La_MGXH1T2 | + MGXH1T2 - (0.535)MGXH1T3 - (0.211)MGDCGSZ |
| La_MGXH2T2 | + MGXH2T2 + (0.599)MGXH1T3 - (0.780)MGDCGSZ |
| La_MGXH1T3 | + MGXH1T3 - (0.486)MGDCGSZ |
| La_MGXH2T3 | + (0.571)MGXH1T3 + MGXH2T3 - (0.771)MGDCGSZ |
| La_OHX04TC | + OHX04TC - (0.620)OHX13TC |
| La_OHX11TC | - (0.795)OHX06TC + OHX11TC |
| La_OHX13TC | - (0.708)OHX06TC + OHX13TC |
| La_OHX20TC | + OHX20TC - (0.605)OHX29TC |
| La_OHX25TC | - (0.489)OHX06TC + OHX25TC |
| La_OHX29TC | - (0.616)OHX06TC + OHX29TC |
| La_bpresSYM | - (0.911)BPXML1 + bpresSYM |
| La_URXUMS | - URXUMA + URXUMS |
| La_URXCRS | - (88.400)URXUCR.x + URXCRS |
| La_URDACT | - (0.858)URXUMA + URDACT |
| La_LBXWBCSI | + LBXWBCSI + (0.126)LBXNEPCT - (1.627)LBDNENO |
| La_LBXLYPCT | + LBXLYPCT + (0.849)LBXNEPCT |
| La_LBXMOPCT | + (0.989)LBXLYPCT + LBXMOPCT + (0.987)LBXNEPCT + (0.991)LBXEOPCT + (0.962)LBXBAPCT |
| La_LBXEOPCT | + (0.443)LBXLYPCT + (0.376)LBXNEPCT + LBXEOPCT |
| La_LBDLYMNO | - (0.947)LBXWBCSI - (0.074)LBXLYPCT - (0.075)LBXNEPCT - (0.061)LBXEOPCT + LBDLYMNO + (1.048)LBDNENO + (0.850)LBDEONO |
| La_LBDMONO | - (0.587)LBXWBCSI + (0.013)LBXLYPCT + (0.011)LBXNEPCT + (0.018)LBXEOPCT + (0.577)LBDLYMNO + LBDMONO + (0.577)LBDNENO + (0.490)LBDEONO |
| La_LBDNENO | - (0.128)LBXNEPCT + LBDNENO |
| La_LBDEONO | + (4.59e-03)LBXNEPCT - (0.072)LBXEOPCT - (0.036)LBDNENO + LBDEONO |
| La_LBDBANO | + (2.10e-03)LBXNEPCT - (0.100)LBXBAPCT - (0.016)LBDNENO + LBDBANO |
| La_LBXRBCSI | + LBXRBCSI + (0.333)LBXHGB - (0.222)LBXHCT + (0.103)LBXMCVSI - (0.151)LBXMCHSI |
| La_LBXHGB | + LBXHGB - (0.354)LBXHCT |
| La_LBXHCT | - (0.083)MGDCGSZ + LBXHCT |
| La_LBXMCVSI | + (5.969)LBXHGB - (2.022)LBXHCT + LBXMCVSI - (2.868)LBXMCHSI |
| La_LBXMCHSI | - (3.528)LBXHGB + (1.248)LBXHCT + LBXMCHSI |
| La_LBXMC | + (1.438)LBXRBCSI - (0.894)LBXHGB + (0.135)LBXHCT + (0.309)LBXMCVSI - (0.709)LBXMCHSI + LBXMC |
| La_LBXMPSI | + (6.32e-03)LBXPLTSI + LBXMPSI |
| La_LBDHDDSI | - (0.026)LBDHDD + LBDHDDSI |
| La_LBDTCSI | - (0.026)LBXTC + LBDTCSI |
Displaying the features associations
par(op)
transform <- attr(trainage_DE,"UPLTM") != 0
colnames(transform) <- str_remove_all(colnames(transform),"La_")
transform <- abs(transform*cor(trainingset[,rownames(transform)])) # The weights are proportional to the observed correlation
VertexSize <- attr(trainage_DE,"fscore") # The size depends on the variable independence relevance (fscore)
names(VertexSize) <- str_remove_all(names(VertexSize),"La_")
VertexSize <- 0.5+9.5*(VertexSize-min(VertexSize))/(max(VertexSize)-min(VertexSize)) # Normalization
VertexSize <- VertexSize[colnames(transform)]
gr <- graph_from_adjacency_matrix(transform,mode = "directed",diag = FALSE,weighted=TRUE)
gr$layout <- layout_with_fr
fc <- cluster_optimal(gr)
# fc <- cluster_walktrap (gr,steps=50)
plot(fc, gr,
edge.width = 2*E(gr)$weight,
vertex.size=VertexSize,
edge.arrow.size=0.5,
edge.arrow.width=0.75,
vertex.label.color="purple",
# vertex.label.cex=0.85,
# vertex.label.dist=1.2,
vertex.label.cex=(0.70 + 0.025*VertexSize),
vertex.label.dist=(0.5 + 0.05*VertexSize),
main="Feature Association")
par(op)
varratios <- attr(trainage_DE,"VarRatio")
names(varratios) <- str_remove_all(names(varratios),"La_")
fscores <- attr(trainage_DE,"fscore")
names(fscores) <- str_remove_all(names(fscores),"La_")
clustable <- as.data.frame(cbind(Variable=fc$names,
Formula=as.character(theCharformulas[paste("La_",fc$names,sep="")]),
Cluster=fc$membership,
ResidualVariance=round(varratios[fc$names],3),
Fscore=round(fscores[fc$names],3)
)
)
rownames(clustable) <- str_replace_all(rownames(clustable),"__","_")
clustable$Variable <- NULL
clustable$Cluster <- as.integer(clustable$Cluster)
clustable$ResidualVariance <- as.numeric(clustable$ResidualVariance)
clustable$Fscore <- as.numeric(clustable$Fscore)
clustable <- clustable[order(-clustable$Fscore),]
clustable <- clustable[order(-clustable$ResidualVariance),]
clustable <- clustable[order(clustable$Cluster),]
pander::pander(as.matrix(clustable))
| Formula | Cluster | ResidualVariance | Fscore | |
|---|---|---|---|---|
| WTINT2YR | NA | 1 | 1.000 | 1 |
| WTMEC2YR | - (1.025)WTINT2YR + WTMEC2YR | 1 | 0.002 | -1 |
| BPXML1 | NA | 2 | 1.000 | 1 |
| bpresSYM | - (0.911)BPXML1 + bpresSYM | 2 | 0.215 | -1 |
| BMXBMI | NA | 3 | 1.000 | 3 |
| BMXLEG | - (0.142)BMXWT + (0.393)BMXBMI + BMXLEG - (0.206)BMXARML - (3.54e-03)MGDCGSZ | 3 | 0.449 | -2 |
| BMXARML | - (0.212)BMXWT + (0.584)BMXBMI + BMXARML - (5.26e-03)MGDCGSZ | 3 | 0.397 | 0 |
| BMXARMC | - (0.666)BMXBMI + BMXARMC - (0.066)MGDCGSZ | 3 | 0.148 | -2 |
| BMXWAIST | - (0.494)BMXWT - (0.843)BMXBMI + BMXWAIST + (0.161)MGDCGSZ | 3 | 0.127 | -2 |
| BMXWT | + BMXWT - (2.759)BMXBMI - (0.325)MGDCGSZ | 3 | 0.113 | 1 |
| BMXHT | - (0.969)BMXWT + BMXHT + (2.674)BMXBMI | 3 | 0.053 | -1 |
| MGDCGSZ | NA | 4 | 1.000 | 10 |
| MGXH1T1 | + MGXH1T1 - (0.439)MGDCGSZ | 4 | 0.119 | -1 |
| MGXH2T1 | + MGXH2T1 + (0.614)MGXH1T3 - (0.764)MGDCGSZ | 4 | 0.065 | -2 |
| MGXH1T3 | + MGXH1T3 - (0.486)MGDCGSZ | 4 | 0.058 | 3 |
| MGXH1T2 | + MGXH1T2 - (0.535)MGXH1T3 - (0.211)MGDCGSZ | 4 | 0.057 | -2 |
| MGXH2T2 | + MGXH2T2 + (0.599)MGXH1T3 - (0.780)MGDCGSZ | 4 | 0.042 | -2 |
| MGXH2T3 | + (0.571)MGXH1T3 + MGXH2T3 - (0.771)MGDCGSZ | 4 | 0.042 | -2 |
| OHX06TC | NA | 5 | 1.000 | 4 |
| OHX29TC | - (0.616)OHX06TC + OHX29TC | 5 | 0.688 | 0 |
| OHX25TC | - (0.489)OHX06TC + OHX25TC | 5 | 0.673 | -1 |
| OHX13TC | - (0.708)OHX06TC + OHX13TC | 5 | 0.672 | 0 |
| OHX20TC | + OHX20TC - (0.605)OHX29TC | 5 | 0.624 | -1 |
| OHX04TC | + OHX04TC - (0.620)OHX13TC | 5 | 0.606 | -1 |
| OHX11TC | - (0.795)OHX06TC + OHX11TC | 5 | 0.392 | -1 |
| URXUMA | NA | 6 | 1.000 | 3 |
| URDACT | - (0.858)URXUMA + URDACT | 6 | 0.379 | -1 |
| URXUMS | - URXUMA + URXUMS | 6 | 0.000 | -2 |
| URXUCR.x | NA | 7 | 1.000 | 2 |
| URXCRS | - (88.400)URXUCR.x + URXCRS | 7 | 0.000 | -2 |
| LBXNEPCT | NA | 8 | 1.000 | 6 |
| LBXBAPCT | NA | 8 | 1.000 | 3 |
| LBXEOPCT | + (0.443)LBXLYPCT + (0.376)LBXNEPCT + LBXEOPCT | 8 | 0.565 | 4 |
| LBDNENO | - (0.128)LBXNEPCT + LBDNENO | 8 | 0.530 | 5 |
| LBDBANO | + (2.10e-03)LBXNEPCT - (0.100)LBXBAPCT - (0.016)LBDNENO + LBDBANO | 8 | 0.405 | -2 |
| LBDEONO | + (4.59e-03)LBXNEPCT - (0.072)LBXEOPCT - (0.036)LBDNENO + LBDEONO | 8 | 0.135 | -1 |
| LBXWBCSI | + LBXWBCSI + (0.126)LBXNEPCT - (1.627)LBDNENO | 8 | 0.131 | 0 |
| LBXLYPCT | + LBXLYPCT + (0.849)LBXNEPCT | 8 | 0.129 | 4 |
| LBDMONO | - (0.587)LBXWBCSI + (0.013)LBXLYPCT + (0.011)LBXNEPCT + (0.018)LBXEOPCT + (0.577)LBDLYMNO + LBDMONO + (0.577)LBDNENO + (0.490)LBDEONO | 8 | 0.113 | -5 |
| LBDLYMNO | - (0.947)LBXWBCSI - (0.074)LBXLYPCT - (0.075)LBXNEPCT - (0.061)LBXEOPCT + LBDLYMNO + (1.048)LBDNENO + (0.850)LBDEONO | 8 | 0.006 | -5 |
| LBXMOPCT | + (0.989)LBXLYPCT + LBXMOPCT + (0.987)LBXNEPCT + (0.991)LBXEOPCT + (0.962)LBXBAPCT | 8 | 0.002 | -9 |
| LBXHCT | - (0.083)MGDCGSZ + LBXHCT | 9 | 0.797 | 3 |
| LBXMCHSI | - (3.528)LBXHGB + (1.248)LBXHCT + LBXMCHSI | 9 | 0.658 | 3 |
| LBXHGB | + LBXHGB - (0.354)LBXHCT | 9 | 0.068 | 4 |
| LBXMC | + (1.438)LBXRBCSI - (0.894)LBXHGB + (0.135)LBXHCT + (0.309)LBXMCVSI - (0.709)LBXMCHSI + LBXMC | 9 | 0.016 | -3 |
| LBXRBCSI | + LBXRBCSI + (0.333)LBXHGB - (0.222)LBXHCT + (0.103)LBXMCVSI - (0.151)LBXMCHSI | 9 | 0.009 | -4 |
| LBXMCVSI | + (5.969)LBXHGB - (2.022)LBXHCT + LBXMCVSI - (2.868)LBXMCHSI | 9 | 0.007 | -4 |
| LBXPLTSI | NA | 10 | 1.000 | 1 |
| LBXMPSI | + (6.32e-03)LBXPLTSI + LBXMPSI | 10 | 0.834 | -1 |
| LBDHDD | NA | 11 | 1.000 | 1 |
| LBDHDDSI | - (0.026)LBDHDD + LBDHDDSI | 11 | 0.000 | -1 |
| LBXTC | NA | 12 | 1.000 | 1 |
| LBDTCSI | - (0.026)LBXTC + LBDTCSI | 12 | 0.000 | -1 |
The heat map of the ILAA transformed data
par(op)
varlistDe <- colnames(trainage_DE)
varlistDe <- varlistDe[varlistDe != outcome]
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
# Training
cormat <- cor(trainage_DE[,varlistDe],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Training: After ILAA Correlation",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
diag(cormat) <- 0
pander::pander(max(abs(cormat)))
0.726
par(op)
# Testing
cormat <- cor(testage_DE[,varlistDe],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Testing: After ILAA Correlation",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
diag(cormat) <- 0
pander::pander(max(abs(cormat)))
0.895
par(op)
outcomeModel <- LASSO_1SE(formula(paste(outcome,"~.")),trainingset);
predOutcome <- predict(outcomeModel,testingset)
pander::pander(as.matrix(outcomeModel$coef))
| (Intercept) | 6.96e+00 |
| WTMEC2YR | -2.93e-06 |
| PEASCTM1 | 1.23e-03 |
| BPXPLS | -1.54e-01 |
| BPXML1 | 2.26e-01 |
| BMXWT | -1.25e-01 |
| BMXBMI | -7.52e-01 |
| BMXLEG | -8.79e-01 |
| BMXARML | 7.77e-01 |
| BMXARMC | 7.49e-02 |
| BMXWAIST | 5.94e-01 |
| MGXH1T1 | -3.79e-02 |
| MGXH1T2 | 1.16e-01 |
| MGXH2T2 | 4.81e-02 |
| MGXH1T3 | 8.50e-02 |
| MGXH2T3 | 1.71e-01 |
| MGDCGSZ | -4.16e-01 |
| OHX04TC | 1.19e+00 |
| OHX06TC | 6.25e-01 |
| OHX10TC | 3.47e-01 |
| OHX11TC | -3.77e-01 |
| OHX13TC | 1.17e+00 |
| OHX20TC | 1.39e+00 |
| OHX25TC | 3.99e-01 |
| OHX29TC | 4.90e-01 |
| bpresDIM | -4.88e-02 |
| bpresSYM | 5.32e-02 |
| URXUMA | 1.17e-04 |
| URXUMS | 2.76e-19 |
| URXUCR.x | -1.63e-02 |
| URDACT | -2.50e-03 |
| LBXLYPCT | -2.23e-01 |
| LBXMOPCT | 1.36e-01 |
| LBXEOPCT | -1.54e-01 |
| LBXBAPCT | 2.98e+00 |
| LBDLYMNO | 4.23e-01 |
| LBDMONO | 1.74e+00 |
| LBDNENO | -1.16e+00 |
| LBDEONO | 2.23e+00 |
| LBXHGB | -5.96e-01 |
| LBXMCVSI | 3.87e-01 |
| LBXMC | -5.36e-01 |
| LBXRDW | 9.77e-01 |
| LBXPLTSI | -2.59e-02 |
| LBXMPSI | -1.06e+00 |
| PHAFSTHR.x | -3.46e-02 |
| PHAFSTMN.x | -1.09e-04 |
| LBDHDDSI | 3.36e+00 |
| LBXTC | 2.48e-02 |
| LBDTCSI | 4.99e-02 |
| URXVOL1 | -1.22e-02 |
| depressionTot | -7.23e-01 |
| RIAGENDR | -6.57e+00 |
| Diab | -1.36e+00 |
rawunittvalues <- apply(as.matrix(testingset[,names(outcomeModel$coef)[-1]]),2,tvals,testingset[,outcome])
names(rawunittvalues) <- names(outcomeModel$coef)[-1]
deunittvalues <- apply(testage_DE[,names(outcomeModel_DE$coef)[-1]],2,tvals,testingset[,outcome])
psig <- 0.1/(ncol(testingset)-1)
lmod <- lm(paste(outcome,"~."),testingset[,c(outcome,names(outcomeModel$coef)[-1])])
try(vifx <-vif(lm(paste(outcome,"~."),testingset[,c(outcome,names(outcomeModel$coef)[-1])])))
Error in vif.default(lm(paste(outcome, “~.”), testingset[, c(outcome, : there are aliased coefficients in the model
sm <- summary(lmod)
if (length(lmod$coef)>10)
{
sm$coefficients[1,4] <- 1.0
gcoef <- lmod$coef[sm$coefficients[,4]<psig]
lmod <- lm(paste(outcome,"~."),testingset[,c(outcome,names(gcoef))])
try(vifx <-vif(lm(paste(outcome,"~."),testingset[,c(outcome,names(gcoef))])))
}
sm <- summary(lmod)
smcoef <- as.data.frame(sm$coefficients)
smcoef <- smcoef[order(-abs(smcoef[,3])),]
smcoef$Uni_t_values <- rawunittvalues[rownames(smcoef)]
if (!inherits(vif,"try-error")) smcoef$vif <-vifx[rownames(smcoef)]
smcoef <- smcoef[!is.na(smcoef$Uni_t_values),]
if (nrow(smcoef)>10) smcoef <- smcoef[smcoef[,4]<psig,]
pander::pander(smcoef)
| Estimate | Std. Error | t value | Pr(>|t|) | Uni_t_values | vif | |
|---|---|---|---|---|---|---|
| BPXML1 | 0.4019 | 0.02524 | 15.93 | 3.45e-51 | 17.362 | 1.09 |
| BMXARML | 1.6152 | 0.22316 | 7.24 | 8.90e-13 | -0.334 | 2.52 |
| BMXLEG | -1.0315 | 0.16499 | -6.25 | 5.95e-10 | -10.171 | 2.48 |
| MGDCGSZ | -0.4653 | 0.07660 | -6.07 | 1.75e-09 | -10.525 | 17.56 |
| BPXPLS | -0.2167 | 0.03590 | -6.04 | 2.19e-09 | -5.817 | 1.09 |
| LBXEOPCT | 2.0961 | 0.41926 | 5.00 | 6.75e-07 | 3.605 | 5.76 |
| LBXPLTSI | -0.0351 | 0.00703 | -5.00 | 6.84e-07 | -5.049 | 1.17 |
| LBDEONO | -18.7655 | 4.79578 | -3.91 | 9.72e-05 | 1.585 | 5.79 |
| BMXWAIST | 0.1054 | 0.02972 | 3.55 | 4.07e-04 | 5.222 | 1.44 |
pander::pander(t(c(R2=sm$r.squared,adj_R2=sm$adj.r.squared)))
| R2 | adj_R2 |
|---|---|
| 0.451 | 0.444 |
pander::pander(c(numvar=nrow(smcoef)))
| numvar |
|---|
| 9 |
lmod_DE <- lm(paste(outcome,"~."),testage_DE[,c(outcome,names(outcomeModel_DE$coef)[-1])])
try(vifx <-vif(lm(paste(outcome,"~."),testage_DE[,c(outcome,names(outcomeModel_DE$coef)[-1])])))
sm <- summary(lmod_DE)
if (length(lmod_DE$coef)>10)
{
sm$coefficients[1,4] <- 1.0
gcoef <- lmod_DE$coef[sm$coefficients[,4]<psig]
lmod_DE <- lm(paste(outcome,"~."),testage_DE[,c(outcome,names(gcoef))])
try(vifx <-vif(lm(paste(outcome,"~."),testage_DE[,c(outcome,names(gcoef))])))
}
sm <- summary(lmod_DE)
lacoef <- as.data.frame(sm$coefficients)
lacoef <- lacoef[order(-abs(lacoef[,3])),]
lacoef$Uni_t_values <- deunittvalues[rownames(lacoef)]
if (!inherits(vifx,"try-error")) lacoef$vif <-vifx[rownames(lacoef)]
lacoef <- lacoef[!is.na(lacoef$Uni_t_values),]
if (nrow(lacoef)>10) lacoef <- lacoef[lacoef[,4]<psig,]
lacoef$formula <- theCharformulas[rownames(lacoef)]
lacoef$VarRatio <- varratios[str_remove_all(rownames(lacoef),"La_")]
pander::pander(lacoef)
| Estimate | Std. Error | t value | Pr(>|t|) | Uni_t_values | vif | formula | VarRatio | |
|---|---|---|---|---|---|---|---|---|
| MGDCGSZ | -0.38256 | 0.025366 | -15.08 | 1.49e-46 | -10.525 | 2.52 | NA | 1.00000 |
| BPXML1 | 0.31352 | 0.022827 | 13.73 | 1.62e-39 | 17.362 | 1.16 | NA | 1.00000 |
| La_BMXWAIST | 0.61538 | 0.066152 | 9.30 | 8.10e-20 | 10.656 | 1.29 | - (0.494)BMXWT - (0.843)BMXBMI + BMXWAIST + (0.161)MGDCGSZ | 0.12686 |
| RIAGENDR | -8.59284 | 1.188495 | -7.23 | 9.45e-13 | -0.461 | 2.88 | NA | 1.00000 |
| La_BMXLEG | -0.99983 | 0.158900 | -6.29 | 4.64e-10 | -12.128 | 1.38 | - (0.142)BMXWT + (0.393)BMXBMI + BMXLEG - (0.206)BMXARML - (3.54e-03)MGDCGSZ | 0.44888 |
| La_LBXLYPCT | -0.73278 | 0.120970 | -6.06 | 1.94e-09 | -8.066 | 1.12 | + LBXLYPCT + (0.849)LBXNEPCT | 0.12936 |
| La_LBXHCT | -0.61927 | 0.110855 | -5.59 | 2.98e-08 | 1.480 | 1.32 | - (0.083)MGDCGSZ + LBXHCT | 0.79676 |
| La_LBXMCHSI | 0.93501 | 0.180333 | 5.18 | 2.60e-07 | 6.218 | 1.20 | - (3.528)LBXHGB + (1.248)LBXHCT + LBXMCHSI | 0.65840 |
| BPXPLS | -0.15626 | 0.031602 | -4.94 | 8.92e-07 | -5.817 | 1.11 | NA | 1.00000 |
| La_BMXARML | 1.02170 | 0.212415 | 4.81 | 1.74e-06 | 5.709 | 1.19 | - (0.212)BMXWT + (0.584)BMXBMI + BMXARML - (5.26e-03)MGDCGSZ | 0.39663 |
| La_LBDNENO | -1.46350 | 0.317285 | -4.61 | 4.48e-06 | -5.685 | 1.24 | - (0.128)LBXNEPCT + LBDNENO | 0.53015 |
| La_OHX20TC | 2.53157 | 0.549632 | 4.61 | 4.62e-06 | 6.826 | 1.04 | + OHX20TC - (0.605)OHX29TC | 0.62438 |
| La_WTMEC2YR | -0.00102 | 0.000248 | -4.13 | 3.94e-05 | -4.503 | 1.02 | - (1.025)WTINT2YR + WTMEC2YR | 0.00232 |
| La_LBXMPSI | -1.48586 | 0.416317 | -3.57 | 3.75e-04 | -4.351 | 1.07 | + (6.32e-03)LBXPLTSI + LBXMPSI | 0.83438 |
| LBXTC | 0.03234 | 0.009192 | 3.52 | 4.54e-04 | 3.650 | 1.11 | NA | 1.00000 |
| LBXPLTSI | -0.02216 | 0.006418 | -3.45 | 5.77e-04 | -5.049 | 1.28 | NA | 1.00000 |
pander::pander(t(c(R2=sm$r.squared,adj_R2=sm$adj.r.squared)))
| R2 | adj_R2 |
|---|---|
| 0.582 | 0.575 |
pander::pander(c(numvar=nrow(lacoef)))
| numvar |
|---|
| 16 |
xvals <-c(min(c(deunittvalues,rawunittvalues))-3,max(c(deunittvalues,rawunittvalues))+3)
par(mfrow=c(1,2),cex=0.5)
plot(smcoef[,c(3,5)],
main="Raw: Univariate t-values vs regression t-values",
xlim=xvals,
ylim=xvals
)
lmtvals <- lm(smcoef[,5]~smcoef[,3])
pred <- lmtvals$coefficients[1] + lmtvals$coefficients[2] * xvals
lines(x=xvals,y=pred,col="red")
text(xvals[1]+(xvals[2]-xvals[1])/2,xvals[2]-1,sprintf("Slope= %.2f",lmtvals$coefficients[2]))
plot(lacoef[-1,c(3,5)],
main="ILAA: Univariate t-values vs regression t-values",
xlim=xvals,
ylim=xvals
)
lmtvals <- lm(lacoef[,5]~lacoef[,3])
pred <- lmtvals$coefficients[1] + lmtvals$coefficients[2] * xvals
lines(x=xvals,y=pred,col="red")
text(xvals[1]+(xvals[2]-xvals[1])/2,xvals[2]-1,sprintf("Slope= %.2f",lmtvals$coefficients[2]))
#pander::pander(summary(lmtvals))
pander::pander(cor.test(smcoef[,3],smcoef[,5]))
| Test statistic | df | P value | Alternative hypothesis | cor |
|---|---|---|---|---|
| 5.34 | 7 | 0.00108 * * | two.sided | 0.896 |
pander::pander(cor.test(lacoef[,3],lacoef[,5]))
| Test statistic | df | P value | Alternative hypothesis | cor |
|---|---|---|---|---|
| 8.36 | 14 | 8.22e-07 * * * | two.sided | 0.913 |
par(op)
par(mfrow=c(1,3),cex=0.5)
plot(lmod$fitted.values,predOutcome,main="Raw: lm train predict vs. test predict",xlab="Train",ylab="Test")
plot(lmod_DE$fitted.values,predOutcome_DE,main="ILAA: lm train predict vs. test predict",xlab="Train",ylab="Test")
plot(predOutcome,predOutcome_DE,xlab="Raw Predicted",ylab="ILAA Predicted",main="Raw vs. ILAA")
par(op)
par(op)
corresults <- CV_IDeA(dataframe,outcome,loops=loops)
..Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model ..Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model Error in vif.default(lm(formula(frm), testage_DE)) : there are aliased coefficients in the model . .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model Error in vif.default(lm(formula(frm), testage_DE)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model …Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model . Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model Error in vif.default(lm(formula(frm), testage_DE)) : there are aliased coefficients in the model ..Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model Error in vif.default(lm(formula(frm), testage_DE)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model Error in vif.default(lm(formula(frm), testage_DE)) : there are aliased coefficients in the model .Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model .. Error in vif.default(lm(formula(frm), testingset)) : there are aliased coefficients in the model
mintvals <- min(c(min(corresults$rawtValues),min(corresults$detValues)))
maxvals <- max(c(max(corresults$rawtValues),max(corresults$detValues)))
xvals <- c(mintvals,maxvals)
vioplot(list(raw=corresults$testRawCorrelations,ILAA=corresults$testDeCorrelations),
ylab="Pearson Correlation",
main="Test Correlations")
pander::pander(t.test(corresults$testDeCorrelations,corresults$testRawCorrelations,paired=TRUE))
| Test statistic | df | P value | Alternative hypothesis | mean difference |
|---|---|---|---|---|
| 3.92 | 29 | 0.000501 * * * | two.sided | 0.00433 |
sylim <- c(1,min(c(20,max(corresults$VIFRaw))))
vioplot(list(raw=corresults$VIFRaw,ILAA=corresults$VIFDe),
ylab="VIF",
ylim=sylim,
main="Test VIF")
pander::pander(summary(cbind(raw=corresults$VIFRaw,ILAA=corresults$VIFDe)))
| raw | ILAA |
|---|---|
| Min. : 88.16 | Min. :3.160 |
| 1st Qu.: 98.44 | 1st Qu.:3.757 |
| Median : 99.86 | Median :3.853 |
| Mean : 98.02 | Mean :4.191 |
| 3rd Qu.:100.60 | 3rd Qu.:3.967 |
| Max. :102.27 | Max. :9.434 |
summary(corresults$VIFRaw)
Min. 1st Qu. Median Mean 3rd Qu. Max. 88.16 98.44 99.86 98.02 100.60 102.27
par(op)
par(mfrow=c(1,2),cex=0.5)
plot(corresults$rawtValues,
main="Raw: Univariate t-values vs Model t-values",
xlab="Univariate",
ylab="Model",
xlim=xvals,
ylim=xvals)
lmtvals <- lm(Model~.,corresults$rawtValues)
pred <- lmtvals$coefficients[1] + lmtvals$coefficients[2] * xvals
lines(x=xvals,y=pred,col="red")
text(xvals[1]+(xvals[2]-xvals[1])/2,xvals[2]-1,sprintf("Slope= %.2f",lmtvals$coefficients[2]))
pander::pander(summary(lmtvals))
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | -0.386 | 0.07064 | -5.47 | 5.41e-08 |
| Uni | 0.196 | 0.00961 | 20.40 | 2.16e-81 |
| Observations | Residual Std. Error | \(R^2\) | Adjusted \(R^2\) |
|---|---|---|---|
| 1426 | 2.6 | 0.226 | 0.226 |
plot(corresults$detValues,
main="ILAA: Univariate t-values vs Model t-values",
xlab="Univariate",
ylab="Model",
xlim=xvals,
ylim=xvals)
lmtvals <- lm(Model~.,corresults$detValues)
pred <- lmtvals$coefficients[1] + lmtvals$coefficients[2] * xvals
lines(x=xvals,y=pred,col="red")
text(xvals[1]+(xvals[2]-xvals[1])/2,xvals[2]-1,sprintf("Slope= %.2f",lmtvals$coefficients[2]))
pander::pander(summary(lmtvals))
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | -0.879 | 0.05257 | -16.7 | 2.96e-57 |
| Uni | 0.602 | 0.00908 | 66.3 | 0.00e+00 |
| Observations | Residual Std. Error | \(R^2\) | Adjusted \(R^2\) |
|---|---|---|---|
| 1371 | 1.88 | 0.763 | 0.763 |
toPCA <- sapply(apply(dataframe,2,unique),length) >= 5 & colnames(dataframe) != outcome
pc <- prcomp(dataframe[,toPCA],center = TRUE,scale. = TRUE,tol=0.01) #principal components
if (ncol(dataframe)<20)
{
pander::pander(as.data.frame(pc$rotation),caption="PCA")
}
rotstd <- log10(abs(100*pc$rotation)+1.0)
gplots::heatmap.2(rotstd,
trace = "none",
dendrogram="none",
breaks=c(0,0.5,1,2,3),
# scale="row",
mar = c(5,5),
col=rainbow(4),
main = "PCA Rotation",
cexRow = cexheat,
cexCol = cexheat,
Rowv=FALSE,
Colv=FALSE,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="log(|100Rot|+1)",
xlab="Output Feature", ylab="Input Feature")
efa <- try(fa(dataframe[,toPCA],ncol(pc$rotation),rotate="varimax",warnings=FALSE)) # EFA analysis
if (!inherits(efa,"try-error"))
{
if (ncol(dataframe)<20)
{
pander::pander(as.data.frame(efa$weights),caption="EFA")
}
rotstd <- log10(abs(100*efa$weights)+1.0)
gplots::heatmap.2(rotstd,
trace = "none",
dendrogram="none",
breaks=c(0,0.5,1,2,3),
# scale="row",
mar = c(5,5),
col=rainbow(4),
main = "EFA weights",
cexRow = cexheat,
cexCol = cexheat,
Rowv=FALSE,
Colv=FALSE,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="log(|100W|+1)",
xlab="Output Feature", ylab="Input Feature")
}
plm <- plsr(formula=formula(paste(outcome,"~.")),data=dataframe,scale =TRUE)
if (ncol(dataframe)<20)
{
lds <- plm$loadings
lds2 <- matrix(as.numeric(lds),nrow=nrow(lds),ncol=ncol(lds))
colnames(lds2) <- colnames(lds)
rownames(lds2) <- rownames(lds)
pander::pander(lds2,caption="PLS")
}
loadadings <- log10(abs(100*plm$loadings) + 1.0)
gplots::heatmap.2(loadadings,
breaks=c(0,0.5,1,2,3),
trace = "none",
dendrogram="none",
# scale="row",
mar = c(5,5),
col=rainbow(4),
main = "PLS Loadings",
cexRow = cexheat,
cexCol = cexheat,
Rowv=FALSE,
Colv=FALSE,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="log(|100Beta|+1)",
xlab="Output Feature", ylab="Input Feature")
ERTmod <- ILAA(dataframe,Outcome = outcome,thr=thro)
ERT <- log10(abs(100*attr(ERTmod,"UPLTM")) + 1);
gplots::heatmap.2(ERT,
trace = "none",
breaks=c(0,0.5,1,2,3),
mar = c(5,5),
col=rainbow(4),
main = "ERT Rotation",
cexRow = cexheat,
cexCol = cexheat,
dendrogram="none",
Rowv=FALSE,
Colv=FALSE,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="log(|100Beta|+1)",
xlab="Output Feature", ylab="Input Feature")
if (ncol(dataframe)<20)
{
pander::pander(attr(ERTmod,"UPLTM"),caption="ERT")
}
thesamples <- c(1:nrow(dataframe));
if (nrow(dataframe)>2000)
{
thesamples <- sample(thesamples,2000)
}
classes <- as.integer(scale(dataframe[thesamples,outcome]))
classes <- classes - min(classes) + 1
raincolors <- heat.colors(length(unique(classes)))
dtatoplot <- as.matrix(FRESAScale(dataframe[thesamples,],method="OrderLogit")$scaledData)
datasetframe.umap = umap(dtatoplot,n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: RAW",col=raincolors[classes],pch=15)
gplots::heatmap.2(dtatoplot,
trace = "none",
mar = c(5,5),
col=heat.colors(5),
main = "Raw",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="Z",
xlab="Feature", ylab="Subject")
dtatoplot <- as.matrix(FRESAScale(predict(pc,dataframe[thesamples,]),method="OrderLogit")$scaledData)
datasetframe.umap = umap(dtatoplot,n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: PCA",col=raincolors[classes],pch=15)
gplots::heatmap.2(dtatoplot,
trace = "none",
mar = c(5,5),
col=heat.colors(5),
main = "PCA",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="Z",
xlab="Feature", ylab="Subject")
if (!inherits(efa,"try-error"))
{
dtatoplot <- as.matrix(FRESAScale(predict(efa,dataframe[thesamples,toPCA]),method="OrderLogit")$scaledData)
datasetframe.umap = umap(dtatoplot,n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: EFA",col=raincolors[classes],pch=15)
gplots::heatmap.2(dtatoplot,
trace = "none",
mar = c(5,5),
col=heat.colors(5),
main = "EFA",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="Z",
xlab="Feature", ylab="Subject")
}
rotframe <- as.matrix(scale(dataframe[thesamples,rownames(plm$loadings)])) %*% plm$loadings
dtatoplot <- as.matrix(FRESAScale(rotframe,method="OrderLogit")$scaledData)
datasetframe.umap = umap(dtatoplot,n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: PLS",col=raincolors[classes],pch=15)
gplots::heatmap.2(dtatoplot,
trace = "none",
mar = c(5,5),
col=heat.colors(5),
main = "PLS",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="Z",
xlab="Feature", ylab="Subject")
dtatoplot <- as.matrix(FRESAScale(ERTmod[thesamples,colnames(ERTmod) != outcome],method="OrderLogit")$scaledData)
datasetframe.umap = umap(dtatoplot,n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: ERT",col=raincolors[classes],pch=15)
gplots::heatmap.2(dtatoplot,
trace = "none",
mar = c(5,5),
col=heat.colors(5),
main = "ERT",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="Z",
xlab="Feature", ylab="Subject")
plot(predOutcome,testingset[,outcome],xlab="Raw Predicted",ylab=outcome,main="Raw: Age Prediction")
plot(predOutcome_DE,testingset[,outcome],xlab="IDeA Predicted",ylab=outcome,main="IDeA Age Prediction")
pander::pander(demographiDesc[demographiDesc$`Variable Name` %in% colnames(transform),c(1,2)])
| Variable Name | Variable Description | |
|---|---|---|
| 46 | WTINT2YR | Full sample 2 year interview weight. |
| 47 | WTMEC2YR | Full sample 2 year MEC exam weight. |
pander::pander(examinationDesc[examinationDesc$`Variable Name` %in% colnames(transform),c(1,2)])
| Variable Name | Variable Description | |
|---|---|---|
| 12 | BPXML1 | MIL: maximum inflation levels (mm Hg) |
| 109 | BMXARMC | Arm Circumference (cm) |
| 110 | BMXARML | Upper Arm Length (cm) |
| 111 | BMXBMI | Body Mass Index (kg/m**2) |
| 113 | BMXHT | Standing Height (cm) |
| 114 | BMXLEG | Upper Leg Length (cm) |
| 120 | BMXWAIST | Waist Circumference (cm) |
| 121 | BMXWT | Weight (kg) |
| 175 | MGDCGSZ | Combined grip strength (kg): the sum of the largest reading from each hand. |
| 185 | MGXH1T1 | Grip strength (kg), hand 1, test 1 |
| 187 | MGXH1T2 | Grip strength (kg), hand 1, test 2 |
| 189 | MGXH1T3 | Grip strength (kg), hand 1, test 3 |
| 191 | MGXH2T1 | Grip strength (kg), hand 2, test 1 |
| 193 | MGXH2T2 | Grip strength (kg), hand 2, test 2 |
| 195 | MGXH2T3 | Grip strength (kg), hand 2, test 3 |
| 212 | OHX04TC | Tooth Count: Upper right 2nd bicuspid/2nd primary molar (2B) |
| 219 | OHX06TC | Tooth Count: Upper right cuspid (C) |
| 236 | OHX11TC | Tooth Count: Upper left cuspid (C) |
| 244 | OHX13TC | Tooth Count: Upper left 2nd bicuspid/2nd primary molar (2B) |
| 266 | OHX20TC | Tooth Count: Lower left 2nd bicuspid/2nd primary molar (2B) |
| 282 | OHX25TC | Tooth Count: Lower right central incisor (CI) |
| 296 | OHX29TC | Tooth Count: Lower right 2nd bicuspid/2nd primary molar (2B) |
pander::pander(questionnaireDesc[questionnaireDesc$`Variable Name` %in% colnames(transform),c(1,2)])
| Variable Name | Variable Description |
pander::pander(labsDesc[labsDesc$`Variable Name` %in% colnames(transform),c(1,2)])
| Variable Name | Variable Description | |
|---|---|---|
| 5 | LBDBANO | Basophils number (1000 cells/uL) |
| 6 | LBDEONO | Eosinophils number (1000 cells/uL) |
| 7 | LBDLYMNO | Lymphocyte number (1000 cells/uL) |
| 8 | LBDMONO | Monocyte number (1000 cells/uL) |
| 9 | LBDNENO | Segmented neutrophils num (1000 cell/uL) |
| 10 | LBXBAPCT | Basophils percent (%) |
| 11 | LBXEOPCT | Eosinophils percent (%) |
| 12 | LBXHCT | Hematocrit (%) |
| 13 | LBXHGB | Hemoglobin (g/dL) |
| 14 | LBXLYPCT | Lymphocyte percent (%) |
| 15 | LBXMC | Mean cell hemoglobin concentration (g/dL) |
| 16 | LBXMCHSI | Mean cell hemoglobin (pg) |
| 17 | LBXMCVSI | Mean cell volume (fL) |
| 18 | LBXMOPCT | Monocyte percent (%) |
| 19 | LBXMPSI | Mean platelet volume (fL) |
| 20 | LBXNEPCT | Segmented neutrophils percent (%) |
| 21 | LBXPLTSI | Platelet count (1000 cells/uL) |
| 22 | LBXRBCSI | Red blood cell count (million cells/uL) |
| 24 | LBXWBCSI | White blood cell count (1000 cells/uL) |
| 26 | LBDHDD | Direct HDL-Cholesterol (mg/dL) |
| 27 | LBDHDDSI | Direct HDL-Cholesterol (mmol/L) |
| 29 | LBDTCSI | Total Cholesterol( mmol/L) |
| 30 | LBXTC | Total Cholesterol( mg/dL) |
| 1413 | LBXHCT | Hydroxycotinine, Serum (ng/mL) |
| 1452 | URDACT | Albumin creatinine ratio (mg/g) |
| 1453 | URXCRS | Creatinine, urine (umol/L) |
| 1455 | URXUMA | Albumin, urine (ug/mL) |
| 1456 | URXUMS | Albumin, urine (mg/L) |